library(bigMap)

Load data

D <- as.matrix(read.csv('../sierpinski3d.mtx', sep = '', header = F))

Initialize (compute bandwidths)

# set range of perplexities
ppxs <- round(nrow(D) *c(.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99), 0)
# compute betas
G <- bdm.init(D, dSet.name = 's3d', is.distance = T, ppx = ppxs, threads = 4)

Run ptSNE

g.list <- bdm.ptsne(D, G, lRate = NULL, theta = .0, qlty = T, threads = 1, layers = 1)
# save
save(g.list, file = './glist.RData')

Embedding cost/size function

nulL <- lapply(g.list, function(g) bdm.cost(g))

Output

# source graph plot
source('../graphs.R')
# load edge matrix
E <- as.matrix(read.csv('../sierpinski3d.edg', sep = '', header = F))
# plot
nulL <- lapply(g.list, function(g) graph.plot(g, E))

hl-Correlation

g.list <- lapply(g.list, function(g) bdm.hlCorr(D, g, zSampleSize = 1000, threads = 4))
save(g.list, file = './glist.RData')
hlTable <- t(sapply(g.list, function(g) summary(g$hlC)))
rownames(hlTable) <- sapply(g.list, function(g) g$ppx$ppx)
knitr::kable(hlTable, caption = 'hl-Correlation') %>%
  kable_styling(full_width = F)
hl-Correlation
Min. 1st Qu. Median Mean 3rd Qu. Max.
20 0.3056392 0.3056392 0.3056392 0.3056392 0.3056392 0.3056392
102 0.2741325 0.2741325 0.2741325 0.2741325 0.2741325 0.2741325
205 0.5928886 0.5928886 0.5928886 0.5928886 0.5928886 0.5928886
410 0.7046438 0.7046438 0.7046438 0.7046438 0.7046438 0.7046438
615 0.7426570 0.7426570 0.7426570 0.7426570 0.7426570 0.7426570
820 0.7050900 0.7050900 0.7050900 0.7050900 0.7050900 0.7050900
1025 0.7270396 0.7270396 0.7270396 0.7270396 0.7270396 0.7270396
1230 0.7351854 0.7351854 0.7351854 0.7351854 0.7351854 0.7351854
1435 0.7399226 0.7399226 0.7399226 0.7399226 0.7399226 0.7399226
1640 0.7407318 0.7407318 0.7407318 0.7407318 0.7407318 0.7407318
1845 0.7384741 0.7384741 0.7384741 0.7384741 0.7384741 0.7384741
1948 0.7426465 0.7426465 0.7426465 0.7426465 0.7426465 0.7426465
2030 0.7427596 0.7427596 0.7427596 0.7427596 0.7427596 0.7427596

Kary-neighborhood preservation

g.list <- lapply(g.list, function(g) bdm.knp(D, g, k.max = NULL, sampling = 0.9, threads = 4))
save(g.list, file = './glist.RData')
bdm.knp.plot(g.list, ppxfrmt = 0)

Running Times

rTimes <- sapply(g.list, function(g) c(g$ppx$t[3], g$t$epoch, g$t$ptsne[3], (g$ppx$t[3] +g$t$ptsne[3])))
rTimes <- round(rTimes, 1)
colnames(rTimes) <- sapply(g.list, function(g) g$ppx$ppx)
rownames(rTimes) <- c('betas', 'epoch', 'pt-SNE', 'total')
knitr::kable(rTimes, caption = 'Computation times (s)') %>%
  kable_styling(full_width = F)
Computation times (s)
20 102 205 410 615 820 1025 1230 1435 1640 1845 1948 2030
betas 0.1 0.2 0.3 0.5 0.7 0.8 0.7 0.9 0.7 0.7 0.6 0.5 0.5
epoch 1.2 1.5 1.7 2.0 1.9 1.9 1.9 1.9 1.9 1.9 1.9 1.9 1.9
pt-SNE 83.2 66.2 67.8 68.8 63.0 63.2 63.4 63.2 63.3 63.1 63.3 63.1 63.0
total 83.3 66.4 68.1 69.3 63.7 64.0 64.1 64.1 63.9 63.8 63.9 63.6 63.5

Run on: Intel(R) Xeon(R) CPU E31225 @ 3.10GHz, 4 cores, 16GB RAM.